import tensorflow as tf
import numpy as np
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import Input, Dropout, Dense, BatchNormalization, Activation, concatenate, GRU, Embedding, Flatten, BatchNormalizationdf = pd.read_csv("summarize_text.csv")
df = df[['article', 'highlights']].dropna()def preprocess_text(text):
text = text.lower()
text = re.sub(r'\([^)]*\)', '', text)
text = re.sub('"', '', text)
text = re.sub(r'[^a-zA-Z?.!,\d]', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text
df['article'] = df['article'].apply(preprocess_text)
df['highlights'] = df['highlights'].apply(preprocess_text)# Tokenizer for input text
text_tokenizer = Tokenizer()
text_tokenizer.fit_on_texts(df['article'])
X_train = text_tokenizer.texts_to_sequences(df['article'])
max_article_length = pd.Series(X_train).map(len).max()+1
X_train_padded = pad_sequences(X_train, maxlen=max_article_length, padding='post')print(X_train_padded.shape)(9, 866)
df['highlights'] = df['highlights'].apply(lambda s: f"startofseq {s} endofseq")
summary_tokenizer = Tokenizer()
summary_tokenizer.fit_on_texts(df['highlights'])
Y_train = summary_tokenizer.texts_to_sequences(df['highlights'])
max_highlights_length = pd.Series(Y_train).map(len).max()+1
Y_train_padded = pad_sequences(Y_train, maxlen=max_highlights_length, padding='post')
print(Y_train_padded.shape)(9, 94)
max_highlights_length94
# Vocabulary sizes
text_vocab_size = len(text_tokenizer.word_index) + 1
summary_vocab_size = len(summary_tokenizer.word_index) + 1class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(AttentionLayer, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, encoder_outputs, decoder_outputs):
decoder_expanded = tf.expand_dims(decoder_outputs, 1) # (batch_size, 1, 93, 1024)
encoder_reshaped= (encoder_outputs)[:, :, None, :] # (batch_size, 866,1, 1024)
# attention scores
score = self.V(tf.nn.tanh(self.W1(encoder_reshaped) + self.W2(decoder_expanded))) # (batch_size, 866, 93, 1)
# attention weight
attention_weights = tf.nn.softmax(score, axis=1)
#context vector
context_vector = tf.reduce_sum(attention_weights * encoder_reshaped, axis=1) #(batch_size, 93, 1024)
return context_vector, attention_weightsfrom tensorflow.keras.layers import Bidirectional, LSTM, Embedding, Dense, Input, Concatenate
embed_size=25
attention_units = 512
# encoder
encoder_inputs = Input(shape=(max_article_length,))
encoder_embedding = Embedding(text_vocab_size, embed_size, mask_zero=True)(encoder_inputs)
encoder_lstm = Bidirectional(LSTM(512, return_state=True, return_sequences=True))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_lstm(encoder_embedding)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
encoder_states = [state_h, state_c]
# decder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(summary_vocab_size, embed_size, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(1024, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
# attention_layer
attention_layer = AttentionLayer(units=attention_units) # Pass required parameter
attention_outputs, attention_weights = attention_layer(encoder_outputs, decoder_outputs)
#combined layers
decoder_combined_context = Concatenate()([decoder_outputs, attention_outputs])
output_layer = Dense(summary_vocab_size, activation='softmax')
outputs = output_layer(decoder_combined_context)
model = tf.keras.models.Model([encoder_inputs, decoder_inputs], outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()D:\Anaconda\Lib\site-packages\keras\src\layers\layer.py:932: UserWarning: Layer 'attention_layer_2' (of type AttentionLayer) was passed an input with a mask attached to it. However, this layer does not support masking and will therefore destroy the mask information. Downstream layers will not see the mask.
warnings.warn(
Model: "functional_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ input_layer_6 (InputLayer) │ (None, 866) │ 0 │ - │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ embedding_4 (Embedding) │ (None, 866, 25) │ 32,225 │ input_layer_6[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ not_equal_4 (NotEqual) │ (None, 866) │ 0 │ input_layer_6[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ input_layer_7 (InputLayer) │ (None, None) │ 0 │ - │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ bidirectional_2 │ [(None, 866, 1024), │ 2,203,648 │ embedding_4[0][0], │ │ (Bidirectional) │ (None, 512), (None, 512), │ │ not_equal_4[0][0] │ │ │ (None, 512), (None, 512)] │ │ │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ embedding_5 (Embedding) │ (None, None, 25) │ 6,775 │ input_layer_7[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ concatenate_8 (Concatenate) │ (None, 1024) │ 0 │ bidirectional_2[0][1], │ │ │ │ │ bidirectional_2[0][3] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ concatenate_9 (Concatenate) │ (None, 1024) │ 0 │ bidirectional_2[0][2], │ │ │ │ │ bidirectional_2[0][4] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ lstm_5 (LSTM) │ [(None, None, 1024), │ 4,300,800 │ embedding_5[0][0], │ │ │ (None, 1024), (None, │ │ concatenate_8[0][0], │ │ │ 1024)] │ │ concatenate_9[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ attention_layer_2 │ [(None, None, 1024), │ 1,050,113 │ bidirectional_2[0][0], │ │ (AttentionLayer) │ (None, 866, None, 1)] │ │ lstm_5[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ concatenate_10 (Concatenate) │ (None, None, 2048) │ 0 │ lstm_5[0][0], │ │ │ │ │ attention_layer_2[0][0] │ ├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤ │ dense_10 (Dense) │ (None, None, 271) │ 555,279 │ concatenate_10[0][0] │ └───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘
Total params: 8,148,840 (31.09 MB)
Trainable params: 8,148,840 (31.09 MB)
Non-trainable params: 0 (0.00 B)
# decoder input and output data
decoder_input_data = Y_train_padded[:, :-1]
decoder_output_data = Y_train_padded[:, 1:]
#decoder_input_data = pad_sequences(decoder_input_data, maxlen=max_highlights_length, padding='post')
#decoder_output_data = pad_sequences(decoder_output_data, maxlen=max_highlights_length, padding='post')print("Decoder input shape:", decoder_input_data.shape)
print("Decoder output shape:", decoder_output_data.shape)Decoder input shape: (9, 93)
Decoder output shape: (9, 93)
# Training
history = model.fit(
[X_train_padded, decoder_input_data], # Encoder input and decoder input
decoder_output_data, # Decoder output
epochs=1,
validation_split=0.2
)1/1 ━━━━━━━━━━━━━━━━━━━━ 138s 138s/step - accuracy: 0.0000e+00 - loss: 5.6012 - val_accuracy: 0.6022 - val_loss: 5.5283
encoder_model = tf.keras.models.Model(encoder_inputs, [encoder_outputs, state_h, state_c])
decoder_state_input_h = tf.keras.layers.Input(shape=(1024,))
decoder_state_input_c = tf.keras.layers.Input(shape=(1024,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
# attention layerr
attention_outputs, attention_weights = attention_layer(encoder_outputs, decoder_outputs)
# Combined
decoder_combined_context = tf.keras.layers.concatenate([decoder_outputs, attention_outputs])
decoder_outputs = output_layer(decoder_combined_context)
decoder_model = tf.keras.models.Model(
[decoder_inputs] + decoder_states_inputs + [encoder_outputs],
[decoder_outputs, state_h, state_c]
)def summarize_text(input_text):
# Preprocess the input text
input_seq = text_tokenizer.texts_to_sequences([input_text])
input_seq = pad_sequences(input_seq, maxlen=max_article_length, padding='post')
# Encode the input sequence to get the encoder outputs and states
encoder_outputs, state_h, state_c = encoder_model.predict(input_seq)
# Initialize the decoder input with the start token
target_seq = np.zeros((1, 1)) # Single timestep for the start token
target_seq[0, 0] = summary_tokenizer.word_index['startofseq'] # Use the correct start token
# Generate the summary
summary_output = ''
for _ in range(max_highlights_length):
# Predict the next token using the decoder model
output_tokens, h, c = decoder_model.predict([target_seq] + [state_h, state_c] + [encoder_outputs])
# Get the predicted token
predicted_token = np.argmax(output_tokens[0, -1, :])
word = summary_tokenizer.index_word.get(predicted_token, '')
# Stop if the end token is predicted or if the word is not found
if word == 'endofseq' or word == '':
break
# Append the word to the summary
summary_output += ' ' + word
# Update the target sequence and states for the next timestep
target_seq = np.zeros((1, 1)) # Reset for the next timestep
target_seq[0, 0] = predicted_token
state_h, state_c = h, c
return summary_output.strip()# Example input text
text_sample = "The stock market saw a significant drop today due to global inflation fears and economic downturn signals."
# Generate summary
summary = summarize_text(text_sample)
print("Generated Summary:", summary)1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 550ms/step
Generated Summary: